setwd("~/Desktop/DSCI_304/Final_Project")
#https://www.kaggle.com/datasets/datasnaek/chess?resource=download
games <- read.csv("games.csv")
#View(games)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Top 15 openings
top_openings <- games %>%
count(opening_name, sort = TRUE) %>%
slice_head(n = 15) %>%
pull(opening_name)
# Filter to top 15
games_top <- games %>%
filter(opening_name %in% top_openings)
# Count number of games by opening and winner
summary_df <- games_top %>%
group_by(opening_name, winner) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(opening_name) %>%
mutate(total = sum(n),
pct = n / total) %>%
ungroup()
# Interactive stacked bar chart
plot_ly(
summary_df,
x = ~reorder(opening_name, -total),
y = ~n,
color = ~winner,
type = "bar",
text = ~paste0(
"Winner: ", winner, "<br>",
"Games: ", n, "<br>",
"Win %: ", scales::percent(pct, accuracy = 0.1)
),
hoverinfo = "text"
) %>%
layout(
barmode = "stack",
title = "Number of Games by Opening and Winner",
xaxis = list(title = "Opening Name"),
yaxis = list(title = "Number of Games")
)